#!/bin/bash
# Begin tagger
##############
#$# creator : Devon Smith
#%# email: smithde@oclc.org
#$# created : 2009-07-20
#$# title : tagger
#$# description : Tag the named entities in a set of files
########## Define a usage function 
function usage {
    echo "Usage: $this"
    if [ "$1" = "exit" ]; then exit; fi
}
########## Set to "false" to turn verbose off
function echo_stdout { echo "$@"; }
function echo_stderr { echo "$@" >& 2; }
# Set verbose to 'echo_stderr' or 'echo_stdout' to see output trace info
verbose=false
$verbose Verbose output on ${verbose##*_}

########## Setup some common paths
# The absolute, canonical ( no ".." ) path to this script
canonical=$(cd -P -- "$(dirname -- "$0")" && printf '%s\n' "$(pwd -P)/$(basename -- "$0")")
# Just the filename of this script
this=$(basename $canonical)
$verbose This: $this
# The directory this script is in
here=$(dirname $canonical)
$verbose Here: $here

########## When the script is being executed
today=$(date +%F)
now=$(date +%H:%M:%S)
$verbose Time: $today $now
##########
if [ "$1" = "-h" ]; then usage 'exit' ; fi
##########
JAVA=$(which java)

format="text/x-ner-markup"
if [ "$1" != "" ]
then
	format="$1"
fi

moduledir=$(dirname $here)
$verbose Module Dir: $moduledir

libdir=$(dirname $moduledir)/lib
$verbose Library Dir: $libdir

jars=(oclc.ner.jar uiuc.ner.jar uiuc.ner.model.jar LBJ2.jar LBJ2Library.jar)
for jar in ${jars[*]}; do CP=${CP:+$CP:}$libdir/$jar; done
$verbose Classpath: $CP

########## Configurable variables
# The directory with files to be tagged; also tagger.prop
input_dir=input
# The directory where tagged files will be put, named identically to input files.
output_dir=output
########## Less configurable variables
# The trainer requires a lot of memory; keep this number as high as your system will allow.
# See java documentation for valid values.
MEMORY=2000m
##########

CLASS=org.oclc.gateman.CmdLineTagger
# Usage: org.oclc.gateman.CmdLineTagger input-dir output-dir [ format ]

$verbose Command: $JAVA -Xmx$MEMORY -cp $CP $CLASS $input_dir $output_dir $format
$JAVA -Xmx$MEMORY -cp $CP $CLASS $input_dir $output_dir $format

############
# End tagger
# vim:ts=4:indentexpr=
